In [1]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px

import plotly.io as pio

pio.renderers.default = "notebook"
In [2]:
world_data = pd.read_csv('Data/our-world-can.csv')
In [3]:
world_data
Out[3]:
Entity Code Day total_tests 142601-annotations Total confirmed deaths due to COVID-19 Total confirmed cases of COVID-19
0 2020 Summer Olympics athletes & staff NaN 2021-06-19 NaN NaN NaN 1.0
1 2020 Summer Olympics athletes & staff NaN 2021-06-20 NaN NaN NaN 1.0
2 2020 Summer Olympics athletes & staff NaN 2021-06-21 NaN NaN NaN 1.0
3 2020 Summer Olympics athletes & staff NaN 2021-06-22 NaN NaN NaN 1.0
4 2020 Summer Olympics athletes & staff NaN 2021-06-23 NaN NaN NaN 1.0
... ... ... ... ... ... ... ...
105718 Zimbabwe ZWE 2021-07-19 NaN NaN 2697.0 85732.0
105719 Zimbabwe ZWE 2021-07-20 NaN NaN 2747.0 88415.0
105720 Zimbabwe ZWE 2020-03-20 NaN NaN NaN 1.0
105721 Zimbabwe ZWE 2020-03-21 NaN NaN NaN 3.0
105722 Zimbabwe ZWE 2020-03-22 NaN NaN NaN 3.0

105723 rows × 7 columns

In [4]:
world_data = world_data.rename(columns={'Entity': 'Country', 'total_tests': 'Number of Tests Performed', '142601-annotations': 'Tested T/F', 'Total confirmed deaths due to COVID-19': 'Deaths', 'Total confirmed cases of COVID-19': 'Cases'})
In [5]:
world_data
Out[5]:
Country Code Day Number of Tests Performed Tested T/F Deaths Cases
0 2020 Summer Olympics athletes & staff NaN 2021-06-19 NaN NaN NaN 1.0
1 2020 Summer Olympics athletes & staff NaN 2021-06-20 NaN NaN NaN 1.0
2 2020 Summer Olympics athletes & staff NaN 2021-06-21 NaN NaN NaN 1.0
3 2020 Summer Olympics athletes & staff NaN 2021-06-22 NaN NaN NaN 1.0
4 2020 Summer Olympics athletes & staff NaN 2021-06-23 NaN NaN NaN 1.0
... ... ... ... ... ... ... ...
105718 Zimbabwe ZWE 2021-07-19 NaN NaN 2697.0 85732.0
105719 Zimbabwe ZWE 2021-07-20 NaN NaN 2747.0 88415.0
105720 Zimbabwe ZWE 2020-03-20 NaN NaN NaN 1.0
105721 Zimbabwe ZWE 2020-03-21 NaN NaN NaN 3.0
105722 Zimbabwe ZWE 2020-03-22 NaN NaN NaN 3.0

105723 rows × 7 columns

In [6]:
can = world_data[world_data['Country'] == 'Canada']
usa = world_data[world_data['Country'] == 'United States']

usa = usa.drop([98765,98764,98763,98762])
In [7]:
can
Out[7]:
Country Code Day Number of Tests Performed Tested T/F Deaths Cases
16800 Canada CAN 2020-01-31 0.0 tests performed NaN 4.0
16801 Canada CAN 2020-02-08 63.0 tests performed NaN 7.0
16802 Canada CAN 2020-02-16 109.0 tests performed NaN 7.0
16803 Canada CAN 2020-02-21 166.0 tests performed NaN 9.0
16804 Canada CAN 2020-02-24 212.0 tests performed NaN 10.0
... ... ... ... ... ... ... ...
17337 Canada CAN 2020-02-22 NaN NaN NaN 9.0
17338 Canada CAN 2020-02-23 NaN NaN NaN 9.0
17339 Canada CAN 2020-02-28 NaN NaN NaN 15.0
17340 Canada CAN 2020-03-02 NaN NaN NaN 32.0
17341 Canada CAN 2020-03-04 NaN NaN NaN 42.0

542 rows × 7 columns

In [8]:
usa
Out[8]:
Country Code Day Number of Tests Performed Tested T/F Deaths Cases
98220 United States USA 2020-03-01 348.0 tests performed 1.0 32.0
98221 United States USA 2020-03-02 861.0 tests performed 6.0 55.0
98222 United States USA 2020-03-03 1480.0 tests performed 7.0 74.0
98223 United States USA 2020-03-04 2370.0 tests performed 11.0 107.0
98224 United States USA 2020-03-05 3587.0 tests performed 12.0 184.0
... ... ... ... ... ... ... ...
98757 United States USA 2020-02-20 NaN NaN NaN 14.0
98758 United States USA 2020-02-21 NaN NaN NaN 16.0
98759 United States USA 2020-02-22 NaN NaN NaN 16.0
98760 United States USA 2020-02-23 NaN NaN NaN 16.0
98761 United States USA 2020-02-24 NaN NaN NaN 16.0

542 rows × 7 columns

In [9]:
can_pivot = can.pivot_table(index = 'Day', values = ['Cases'], aggfunc='sum').reset_index()
usa_pivot = usa.pivot_table(index = 'Day', values = ['Cases'], aggfunc='sum').reset_index()
In [10]:
can_pivot.insert(0, 'Country', ['Canada']*542, True)
can_pivot
Out[10]:
Country Day Cases
0 Canada 2020-01-26 1.0
1 Canada 2020-01-27 1.0
2 Canada 2020-01-28 2.0
3 Canada 2020-01-29 2.0
4 Canada 2020-01-30 2.0
... ... ... ...
537 Canada 2021-07-16 1430127.0
538 Canada 2021-07-17 1430437.0
539 Canada 2021-07-18 1430740.0
540 Canada 2021-07-19 1431378.0
541 Canada 2021-07-20 1431691.0

542 rows × 3 columns

In [11]:
usa_pivot.insert(0, 'Country', ['United States']*542, True)
usa_pivot
Out[11]:
Country Day Cases
0 United States 2020-01-22 1.0
1 United States 2020-01-23 1.0
2 United States 2020-01-24 2.0
3 United States 2020-01-25 2.0
4 United States 2020-01-26 5.0
... ... ... ...
537 United States 2021-07-16 34054952.0
538 United States 2021-07-17 34067912.0
539 United States 2021-07-18 34079960.0
540 United States 2021-07-19 34132071.0
541 United States 2021-07-20 34174774.0

542 rows × 3 columns

In [12]:
added_data = pd.concat([usa_pivot, can_pivot])
added_data
Out[12]:
Country Day Cases
0 United States 2020-01-22 1.0
1 United States 2020-01-23 1.0
2 United States 2020-01-24 2.0
3 United States 2020-01-25 2.0
4 United States 2020-01-26 5.0
... ... ... ...
537 Canada 2021-07-16 1430127.0
538 Canada 2021-07-17 1430437.0
539 Canada 2021-07-18 1430740.0
540 Canada 2021-07-19 1431378.0
541 Canada 2021-07-20 1431691.0

1084 rows × 3 columns

In [13]:
usa_pivot2 = usa_pivot.copy()
can_pivot2 = can_pivot.copy()

usa_pivot2.insert(3, 'Cases / Population', usa_pivot['Cases']/328200000, True)
can_pivot2.insert(3, 'Cases / Population', can_pivot['Cases']/37590000, True)

usa_pivot2 = usa_pivot2.drop(columns='Cases')
cam_pivot2 = can_pivot2.drop(columns='Cases')
In [14]:
usa_pivot2
Out[14]:
Country Day Cases / Population
0 United States 2020-01-22 3.046923e-09
1 United States 2020-01-23 3.046923e-09
2 United States 2020-01-24 6.093845e-09
3 United States 2020-01-25 6.093845e-09
4 United States 2020-01-26 1.523461e-08
... ... ... ...
537 United States 2021-07-16 1.037628e-01
538 United States 2021-07-17 1.038023e-01
539 United States 2021-07-18 1.038390e-01
540 United States 2021-07-19 1.039978e-01
541 United States 2021-07-20 1.041279e-01

542 rows × 3 columns

In [15]:
can_pivot2
Out[15]:
Country Day Cases Cases / Population
0 Canada 2020-01-26 1.0 2.660282e-08
1 Canada 2020-01-27 1.0 2.660282e-08
2 Canada 2020-01-28 2.0 5.320564e-08
3 Canada 2020-01-29 2.0 5.320564e-08
4 Canada 2020-01-30 2.0 5.320564e-08
... ... ... ... ...
537 Canada 2021-07-16 1430127.0 3.804541e-02
538 Canada 2021-07-17 1430437.0 3.805366e-02
539 Canada 2021-07-18 1430740.0 3.806172e-02
540 Canada 2021-07-19 1431378.0 3.807869e-02
541 Canada 2021-07-20 1431691.0 3.808702e-02

542 rows × 4 columns

In [16]:
mod_data = pd.concat([usa_pivot2, can_pivot2])
In [17]:
fig = px.line(can_pivot, x = "Day", y = "Cases",
             template = "plotly_dark", title = "Confirmed Cases Over Time in Canada",
              color_discrete_sequence = ['Red'])
fig.show()

fig = px.line(usa_pivot, x = "Day", y = "Cases",
             template = "plotly_dark", title = "Confirmed Cases Over Time in the USA",
              color_discrete_sequence = ['Blue'])
fig.show()
In [18]:
fig = px.line(added_data, x='Day', y='Cases', color='Country', template = "plotly_dark", title = "Confirmed Cases Over Time in the USA vs Canada",)

fig.show()
In [19]:
fig = px.line(mod_data, x='Day', y='Cases / Population', color='Country', template = "plotly_dark", title = "Cases / Population in the USA vs Canada",)

fig.show()